
gl dat "\\micro.intra\Projekt\P0624$\P0624_Gem\Politisk Geografi\Data"
gl temp "\\micro.intra\Projekt\P0624$\P0624_Gem\Politisk Geografi\temp"
gl gdat "\\micro.intra\Projekt\P0624$\P0624_Gem\Data Extraction and Files\Data files"
gl do "\\micro.intra\Projekt\P0624$\P0624_Gem\Politisk Geografi\Dofiler\Replication_v2"
gl res "\\micro.intra\Projekt\P0624$\P0624_Gem\Politisk Geografi\Results"

cap log close
log using "$res\log\setup.log", replace

set type double, permanently 


*** raw files used in analysis ****
/*
coordinates.dta             // geo coded variables of where all individuals live from Statistics Sweden
valdistrikt_alla 		// election districts from statistics Sweden, district level
distrikt_0210.dta			//election districts from Statistics Sweden, individual level  
Birthyear_sex.dta 			// variables from statistics Sweden
Birth_region.dta 			// variables from statistics Sweden
Kommundata.dta 				// aggregated municipal level data from Statistics Sweden
pol_id_complete 		// id-codes for everyone that has been a politician from Statistics Sweden
nom_ko_X.dta			// nominated politicians year X from Statistics Sweden
valda_ko_X.dta			// elected politicians year X from Statistics Sweden
parl dist 2010.dta"  // election results from Valmyndigheten 2010
mun dist 2010.dta    // election results from Valmyndigheten 2010
roster parti distrikt alla val X.dta // election results from Valmyndigheten for year X
all_price_transform.dta   // Housing prices from Mäklarstatistik
tenure500_midpoint.dta   // Data on home ownership from GeoSweden
mun competition.dta // Dataset from Folke, =., and Rickne, J. (2020) All the single ladies: Job promotions and the durability of marriage. Amerivan Economic Journal: Applied Economics, 12(1), 260-287

*/

* Data from LISA registry is called directly via odbc load
* Data from Fastighetsregistret (Building register) is called directly via odbc load
* Data from Bygglovsregistret (Building permit register)  is called directly via odbc load


*************************
*****     LISA        ***
*************************
foreach year in 2002 2006 2010{
if `year'==2002	odbc load, exec("select  Sun2000niva,inc=DispInk ,Barn7_10, Barn11_15, p_id=Person_LopNr from P0624_Lisa_2002") dsn("P0624") clear
else odbc load, exec("select  Sun2000niva, inc=DispInk04,Barn7_10, Barn11_15, p_id=Person_LopNr from P0624_Lisa_`year'") dsn("P0624") clear	
gen year =`year' 
if `year'==2002 save "$dat\lisa_togeo", replace
else append using "$dat\lisa_togeo"
save "$dat\lisa_togeo", replace
}

	replace Sun2000niva="" if Sun2000niva=="*" | Sun2000niva=="-"
	destring Sun2000niva, replace force
	replace  Sun2000niva=. if Sun2000niva>=999

gen educ_year=.
replace educ_year= 7.5 if Sun2000niva>=100 & Sun2000niva<200
replace educ_year= 9.4 if Sun2000niva>=200 & Sun2000niva<300
replace educ_year= 11.2 if Sun2000niva>=300 & Sun2000niva<330
replace educ_year= 12.4 if Sun2000niva>=330 & Sun2000niva<400
replace educ_year= 14.2 if Sun2000niva>=410& Sun2000niva<530
replace educ_year= 17 if Sun2000niva>=530 & Sun2000niva<600
replace educ_year= 20.4 if Sun2000niva>=600 & Sun2000niva<=1000

gen child_7_15= Barn7_10+ Barn11_15


joinby p_id using "$dat\Birthyear_sex.dta", unmatched(none)
gen age=year-Fodelse
	

joinby p_id using "$dat\Birth_region.dta", unmatched(master)
drop _merge

g inv1g_ejsv=(FodelseVarldsdel!="Sverige") if !mi(FodelseVarldsdel)
g inv1g_ejn=!inlist(FodelseVarldsdel, "Norden utom Sverige", "Sverige" )  if !mi(FodelseVarldsdel)
g inv1g_eje=!inlist(FodelseVarldsdel, "Norden utom Sverige", "Sverige" , "Nordamerika", "EU28 utom Norden")  if !mi(FodelseVarldsdel)


save "$dat\lisa_togeo", replace


*************************
*****  Politicians    ***
*************************

* Grunddata för politikernas politiska information
****** vi gör en separat set up för det gamla datat då detta är annorlunda än det nya i strukture
	
	cd "\\micro.intra\Projekt\P0624$\P0624_Data"
	use "nom_ko_82.dta", clear
	
	append using "nom_ko_85.dta"
	append using "nom_ko_88.dta"
		foreach var of varlist nrinom listnr ko_valkretskod llkk{
	destring `var' , replace force
	}
	do "$do\mun code clean.do" 
	
	drop if llkk==.
	
	collapse (min) nrinom  , by(lopnr  partikod llkk  ar)
	gen parti_initial = "M" if partikod=="0001"
	replace parti_initial = "S" if partikod =="0002"
	replace parti_initial = "F" if partikod =="0003"
	replace parti_initial = "C" if partikod =="0004"
	replace parti_initial = "V" if partikod =="0005"
	replace parti_initial = "G" if partikod =="0055"
	replace parti_initial = "B" if partikod =="0110"
	replace parti_initial = "G" if partikod =="0055"
	replace parti_initial = "K" if partikod =="0068"
	
	
	replace parti_initial = "L" if  parti_initial=="" & partikod !=""
		
	save "$gdat\temp_rel_nom_old.dta", replace
	
	use "valda_ko_82.dta", clear	
	append using "valda_ko_85.dta"
	append using "valda_ko_88.dta"

	destring llkk, replace force

do "$do\mun code clean.do" 
	gen llkk_2=llkk
	
		gen parti_initial_v = "M" if partinamn=="MODERATA SAMLINGSPARTIET"
	replace parti_initial_v = "S" if partinamn =="ARBETAREPARTIET SOCIALDEMOKRATERNA" |partinamn =="ARBETAREPARTIET-SOCIALDEMOKRATERNA"|partinamn =="ARBETAREPARTIET- SOCIALDEMOKRATERNA" 
	replace parti_initial_v = "F" if partinamn =="FOLKPARTIET"
	replace parti_initial_v = "C" if partinamn =="CENTERN" |partinamn =="CENTERPARTIET" 
	replace parti_initial_v = "V" if substr(partinamn, 3, 9) =="NSTERPART"
	replace parti_initial_v = "G" if substr(partinamn, 1, 4)=="MILJ"
	replace parti_initial_v = "K" if substr(partinamn, 1, 5)=="KRIST"
	replace parti_initial_v = "L" if  parti_initial_v=="" &  partinamn!=""
	
	gen vald=1

	rename nrinom inval_ord
	destring inval_ord, replace
	gen parti_initial=parti_initial_v 
	collapse (max) vald (min) inval_ord, by(llkk parti_initial parti_initial_v llkk_2 ar partinamn lopnr)	
	save "$gdat\temp_vald_old.dta", replace
		
	* gör en vald-indikator
	use "$gdat\temp_rel_nom_old.dta", clear
	joinby lopnr llkk ar parti_initial using "\\micro.intra\Projekt\P0624$\P0624_Gem\Data Extraction and Files\Data files\temp_vald_old.dta", unmatched(both)
	
		foreach var of varlist partikod{
	destring `var' , replace force
	}
	
	save "$gdat\temp_pol_old.dta", replace
		
	use "nom_ko_9111.dta", clear
	* Fixa korrekta listnummer för 1994
	drop if ar == 1994
	append using "2014\nom_ko_2014.dta"
	
	foreach var of varlist nrinom listnr ko_valkretskod{
	destring `var' , replace force
	}

	append using "nom_ko_94"
	replace ar=1994 if ar==.
		destring llkk, replace force

drop if ar==2011
do "$do\mun code clean.do" 
	drop if llkk==.
	rename vald vald_n
	save "$gdat\temp_rel_nom.dta", replace

	use "valda_ko_9111.dta", clear
	append using "2014\valda_ko_2014.dta"
	drop if ar==2011

	destring llkk, replace force
	do "$do\mun code clean.do" 
	gen vald=1

		foreach var of varlist valgrund  ko_valkretskod llkk{
	destring `var' , replace force
	}
	
joinby lopnr ko_valkretskod llkk ar partikod using "$gdat\temp_rel_nom.dta", unmatched(both)
		foreach var of varlist ripartikod partikod{
	destring `var' , replace force
	}

egen tag_indarkrets=tag (lopnr ar ko_valkretskod )

	* summera personröster över valkrets och år	
	bysort lopnr  ko_valkretskod ar: egen pers_roster_b= sum(antpersonrost)
	gen pers_roster=pers_roster_b if tag_indarkrets==1

	* sätt vald=1 om vald i minst en valkrets
	bysort  lopnr ar: egen vald_korr=max(vald)
	
	* sätt vald_lista=1 om man inte är personvald utan vald från en viss lista 
	gen vald_lista= 1 if (valgrund ==1  | valgrund ==2  | valgrund ==5  | valgrund ==6 ) &vald==1

	* väljs slumpmässig observation för varje individ och år
	egen tag_indar=tag (lopnr ar)
	bysort   llkk ko_valkretskod partikod listnr ar: egen valda=sum(vald)
	bysort   llkk ko_valkretskod partikod ar: egen valda_rank=rank(valda) ,field
	bysort   llkk ko_valkretskod partikod ar: egen valda_rank1=max(valda) if valda_rank==1
	bysort   llkk ko_valkretskod partikod ar: egen valda_rank2=max(valda) if valda_rank==2

	gen huvudlista_vk =1
	replace huvudlista_vk=0 if valda_rank!=1 & ((valda_rank1>=(2*valda))|(valda==0 &valda_rank1>1))

	gen nrinom_hl=nrinom if huvudlista_vk==1	
	gen personval=1 if valgrund==1 |valgrund==5|valgrund==3
 replace personval=0 if valgrund==2 |valgrund==6 |valgrund==4
 
 gen nrinom_vikt=nrinom/valda
	
	gen valkrets =ko_valkretskod 
replace valkrets=1 if valkrets==0
gen valar=ar

collapse (min)nrin* (max)  personval vald  (min) ripartikod  llkk (sum) pers_roster, by (partikod lopnr ar)
drop if ar==2011
	
 bysort llkk ar partikod: egen rank_ind= rank (nrinom_vikt) , track
 bysort llkk ar partikod: egen pers_sum= sum (pers_roster) 
 gen pers_share_parti =pers_roster/pers_sum if ar>1997
 
	gen parti_initial = "M" if partikod==1
	replace parti_initial = "S" if partikod ==2
	replace parti_initial = "F" if partikod ==3
	replace parti_initial = "C" if partikod ==4
	replace parti_initial = "V" if partikod ==5
	replace parti_initial = "G" if partikod ==55
	replace parti_initial = "B" if partikod ==110
	replace parti_initial = "G" if partikod ==55
	replace parti_initial = "K" if partikod ==68
	replace parti_initial = "N" if partikod ==170
	replace parti_initial = "L" if  parti_initial=="" & partikod !=.	


append using "$gdat\temp_pol_old.dta"
	gen nom=1

	replace vald=0 if vald==.
	gen Person_LopNr= lopnr
	gen valar = ar
	duplicates tag lopnr ar, gen(test)
	 drop if test!=0 & vald==0
	drop _merge test
		duplicates tag lopnr ar, gen(test)
		drop if test!=0 
		drop test
		ren Person p_id
	save "$dat\mun politician data" , replace


********************************
*****    District and id     ***
********************************
	
use "\\micro.intra\Projekt\P0624$\P0624_Data\distrikt_0210.dta" , clear
gen llkk= substr(llkkdddd,1,4)
gen dist= substr(llkkdddd,5,4)
drop llkkdddd 
ren ar year
ren lop p_id
destring llkk dist , force replace
do "$do\mun code clean.do" 
save "$dat\dist p_id year.dta", replace
 

 
*************************
*****    Property     ***
*************************

odbc load, exec("SELECT * FROM Tab01_Fastighet") dsn("P0624") clear
save "$dat\Fastigheter\fast_geo.dta", replace

odbc load, exec("SELECT * FROM Tab06A_Avregdat") dsn("P0624") clear
save "$dat\Fastigheter\fast_avregdat.dta", replace

odbc load, exec("SELECT * FROM Tab08A_Areal") dsn("P0624") clear
g temp_areal=_n
save "$dat\Fastigheter\areal.dta", replace


odbc load, exec("SELECT * FROM Tab55A_Entre") dsn("P0624") clear
save "$dat\Fastigheter\entre.dta", replace

odbc load, exec("SELECT * FROM Tab55B_Lagenhet") dsn("P0624") clear
g temp_lgh=_n
gen lgh= Lghkat=="1" |  Lghkat=="9" 
gen spec= Lghkat=="2" |  Lghkat=="4" 
gen stud= Lghkat=="3"
joinby RNPENTRE_RIDENTRE_LopNr using "$dat\Fastigheter\entre.dta", unmatched(both)
sort RNPREGBY_RIDREGBY_LopNr temp_lgh
drop temp_lgh
collapse (sum) lgh spec stud  Boarea, by(RNPREGBY_RIDREGBY_LopNr)
save "$dat\Fastigheter\lgh.dta", replace
 

odbc load, exec("SELECT * FROM Tab50A_Registerbyggnad") dsn("P0624") clear
g temp_buildings=_n
destring Byggtyp, replace force
gen b_multi=Byggtyp==33
gen b_single =Byggtyp==30
gen b_chain=Byggtyp==31 |Byggtyp==32
gen b_two=Byggtyp==35

joinby RNPREGBY_RIDREGBY_LopNr using "$dat\Fastigheter\lgh.dta", unmatched(both)
keep b_* Totboarea Totlokarea Byggnstat Byggar Ombyggar Nyregors Avregors  FNR_LopNr lgh spec stud Boarea temp_buildings
save "$dat\Fastigheter\buildings.dta", replace

****koppla på fastigheter
use "$dat\Fastigheter\fast_geo.dta", clear
joinby FNR_LopNr  using "$dat\Fastigheter\fast_avregdat.dta", unmatched(both) 
drop _merge

joinby FNR_LopNr  using "$dat\Fastigheter\buildings.dta", unmatched(both)
gen avregar=yofd(dofc(Avregdat))
drop _merge

joinby FNR_LopNr  using "$dat\Fastigheter\areal.dta", unmatched(master)
destring Byggar, force replace
egen area_basea  = rowtotal (Totboarea Totlokarea)
egen area_base  = rowmax (area_basea Boarea)
foreach type in single multi chain two{
gen b_area_`type'= area_base   if b_`type'==1
gen b_no_`type'=lgh  if b_`type'==1
}
drop area*
gsort FNR_LopNr -Land -Totar temp_areal temp_buildings
egen tag=tag (FNR_LopNr) 
gen land_area=Land if tag==1
replace land_area=Totar if Landar==0  & tag==1
drop _merge- Oskod temp*
save "$dat\Fastigheter\fastbygg.dta", replace

foreach year in 2002 2006 2010 {
use "$dat\Fastigheter\fastbygg.dta", clear 
g temp=_n
keep if Byggar<= `year' & Byggnstat=="1"
ren  Valdist`year' valdist
gen dist=substr(valdist, 5,4)
gen llkk=substr(valdist, 1,4)
destring dist llkk, replace force
do "$do\mun code clean.do"
sort  dist llkk temp
drop temp*
collapse (sum) b_* land lgh, by(dist llkk)
gen year =  `year'
drop if mi(dist)
if `year'==2002 save "$dat\property.dta", replace
else append using "$dat\property.dta"
save "$dat\property.dta", replace
}




 
**********************************************
*****   Match district and coordinates     ***
**********************************************

foreach year in 2002 2006 2010{
odbc load, exec("SELECT * FROM Tab01_Fastighet") dsn("P0624") clear
keep  FNR_LopNr Valdist`year' RutaRT
drop if Valdist==""
joinby FNR_LopNr  using "$dat\Fastigheter\buildings.dta", unmatched(master)
drop temp_buildings
egen fast=tag (FNR_LopNr)

gen xcoord=substr(Ruta,8,7)
gen ycoord=substr(Ruta,1,7)

destring ycoord xcoord, replace force
  
replace ycoord=int(ycoord/500)*500
replace xcoord=int(xcoord/500)*500

collapse (sum) lgh fast, by (Valdist ycoor xcoor)
 
 bysort ycoor xcoor: egen max_lgh=max(lgh)
 keep if lgh==max_lgh
  bysort ycoor xcoor: egen max_fast=max(fast)
 keep if fast==max_fast
 sort ycoord xcoord Valdist*
 duplicates drop ycoor xcoor, force
 keep ycoor xcoor Valdist

 save "$dat\xy_valdist_`year'.dta", replace
 }
 

 
 
 
*************************
*****    Network      ***
*************************

*******  coll   ****
	odbc load, exec("select p_id=Person_LopNr, cfar=CfarNr_LopNr, peorg=Peorgnr_LopNr from P0624_Lisa_2002") dsn("P0624") clear
	gen year=2002
	save "$dat\coll_neighbours.dta", replace
	
	clear
		odbc load, exec("select p_id=Person_LopNr, cfar=CfarNr_LopNr, peorg=Peorgnr_LopNr from P0624_Lisa_2006") dsn("P0624"), 
			gen year=2006
			append using "$dat\coll_neighbours.dta"
			save "$dat\coll_neighbours.dta", replace
			
				clear
		odbc load, exec("select p_id=Person_LopNr, cfar=CfarNr_LopNr, peorg=Peorgnr_LopNr from P0624_Lisa_2010") dsn("P0624")
			gen year=2010
			append using "$dat\coll_neighbours.dta"
			save "$dat\coll_neighbours.dta", replace
			
			joinby  p_id year using "$dat\dist p_id year.dta", unmatched(none)
			
			gen count=1
			bysort year cfar peorg: egen wp_size= sum(count) if  cfar!=. & peorg!=.
			bysort dist year cfar peorg: egen wp_neigh= sum(count)  if  cfar!=. & peorg!=.
			replace wp_neigh= wp_neigh-1
			
			gen valar=year
	joinby p_id valar using "$dat\mun politician data.dta", unmatched(none) 
	
	gen parti_low=lower(parti_initial)
	
*generarar variabel för antalter nominerade och valda från varje parti
foreach parti in b m c k f l g s v{
gen vald_`parti'= vald==1 & parti_low =="`parti'" 
gen vald_ab5coll_`parti'= vald==1 & parti_low =="`parti'" & wp_neigh>=5 & wp_neigh!=.
gen vald_bel5coll_`parti'= vald==1 & parti_low =="`parti'" & (wp_neigh<5 |  wp_neigh==.)
gen vald_coll_`parti'= vald==1 & parti_low =="`parti'" & wp_neigh>=1 & wp_neigh!=.
gen vald_nocoll_`parti'= vald==1 & parti_low =="`parti'" & (wp_neigh<1 |  wp_neigh==.)
 }

 do "$do\mun code clean.do" 
 collapse (sum) vald vald_*  , by (llkk dist year)  

gen valar=year
joinby llkk valar using "$dat\kommundata.dta", unmatched(master)
	foreach gr in vald{
 gen `gr'_maj =mmaj2*`gr'_m +  cmaj2*`gr'_c +  fpmaj2*`gr'_f + kdmaj2*`gr'_k + mpmaj2*`gr'_g + smaj2*`gr'_s + vmaj2*`gr'_v
 gen `gr'_min= (`gr'_m + `gr'_c + `gr'_f + `gr'_k + `gr'_g + `gr'_s + `gr'_v)-`gr'_maj
 
 foreach pow in maj min{
 		bysort llkk year:  egen kom_`gr'_`pow'=sum(`gr'_`pow')
		drop `gr'_`pow'
 }
 
	}
	foreach gr in vald_ab5coll vald_bel5coll vald_coll vald_nocoll{
 gen `gr'_maj =mmaj2*`gr'_m +  cmaj2*`gr'_c +  fpmaj2*`gr'_f + kdmaj2*`gr'_k + mpmaj2*`gr'_g + smaj2*`gr'_s + vmaj2*`gr'_v
 gen `gr'_min= (`gr'_m + `gr'_c + `gr'_f + `gr'_k + `gr'_g + `gr'_s + `gr'_v)-`gr'_maj
 
 foreach pow in maj min{
 		bysort llkk year:  egen kom=sum(`gr'_`pow')
* calculate share of group living in the district
	gen sh_`gr'_`pow' =`gr'_`pow' /kom
	gen shalt_`gr'_`pow' =`gr'_`pow' /kom_vald_`pow'
	drop kom
 } 
 gen diff_maj_min_`gr'=sh_`gr'_maj-sh_`gr'_min
  gen diffalt_maj_min_`gr'=shalt_`gr'_maj-shalt_`gr'_min
 drop sh_`gr'_maj  sh_`gr'_min shalt_`gr'_maj  shalt_`gr'_min `gr'_maj `gr'_min
 }
 

keep year llkk dist diff*
save "$dat\coll_neighbours.dta", replace

	

***** timesave *****
	
odbc load, exec("SELECT * FROM Geo") dsn("P0624") clear
keep X* Y* Lop
ren Lop p_id
joinby p_id using "$dat\pol_id_complete.dta" , unmatched(none)
duplicates drop p_id, force
reshape long XKOORD_@ YKOORD_@, i(p_id) j(year)

sort p_id year

gen years_same=XK==XK[_n-1] & YK==YK[_n-1] if p_id[_n-1]==p_id

forvalues years=2/20{
	replace years_same=`years' if years_same==`years'-1 & XK[_n-`years'] & YK==YK[_n-`years'] & p_id[_n-`years']==p_id
}

			gen valar=year

	joinby p_id valar using "$dat\mun politician data.dta", unmatched(none) 

	gen parti_low=lower(parti_initial)
	
*generarar variabel för antalter nominerade och valda från varje parti
foreach parti in b m c k f l g s v{
gen vald_`parti'= vald==1 & parti_low =="`parti'" 
gen vald_ab5sm_`parti'= vald==1 & parti_low =="`parti'" & years_same>=5 & years_same!=.
gen vald_bel5sm_`parti'= vald==1 & parti_low =="`parti'" & (years_same<5 .)
gen vald_ab10sm_`parti'= vald==1 & parti_low =="`parti'" & years_same>=10 & years_same!=.
gen vald_bel10sm_`parti'= vald==1 & parti_low =="`parti'" & (years_same<10 .)
gen vald_ab15sm_`parti'= vald==1 & parti_low =="`parti'" & years_same>=15 & years_same!=.
gen vald_bel15sm_`parti'= vald==1 & parti_low =="`parti'" & (years_same<15 .)
 }
 
joinby  p_id year using "$dat\dist p_id year.dta", unmatched(none)
do "$do\mun code clean.do" 
 collapse (sum) vald vald_*  , by (llkk dist year)
 
 

gen valar=year
joinby llkk valar using "$dat\kommundata.dta", unmatched(master)
	foreach gr in vald{
 gen `gr'_maj =mmaj2*`gr'_m +  cmaj2*`gr'_c +  fpmaj2*`gr'_f + kdmaj2*`gr'_k + mpmaj2*`gr'_g + smaj2*`gr'_s + vmaj2*`gr'_v
 
 gen `gr'_min= (`gr'_m + `gr'_c + `gr'_f + `gr'_k + `gr'_g + `gr'_s + `gr'_v)-`gr'_maj
 
 foreach pow in maj min{
 		bysort llkk year:  egen kom_`gr'_`pow'=sum(`gr'_`pow')
		drop `gr'_`pow'
 }
 
	}
	foreach gr in vald_ab5sm vald_bel5sm vald_ab10sm vald_bel10sm vald_ab15sm vald_bel15sm{
 gen `gr'_maj =mmaj2*`gr'_m +  cmaj2*`gr'_c +  fpmaj2*`gr'_f + kdmaj2*`gr'_k + mpmaj2*`gr'_g + smaj2*`gr'_s + vmaj2*`gr'_v
 gen `gr'_min= (`gr'_m + `gr'_c + `gr'_f + `gr'_k + `gr'_g + `gr'_s + `gr'_v)-`gr'_maj
 
 foreach pow in maj min{
 		bysort llkk year:  egen kom=sum(`gr'_`pow')
* calculate share of group living in the district
	gen sh_`gr'_`pow' =`gr'_`pow' /kom
	gen shalt_`gr'_`pow' =`gr'_`pow' /kom_vald_`pow'
	drop kom
 } 
 gen diff_maj_min_`gr'=sh_`gr'_maj-sh_`gr'_min
  gen diffalt_maj_min_`gr'=shalt_`gr'_maj-shalt_`gr'_min
 drop sh_`gr'_maj  sh_`gr'_min shalt_`gr'_maj  shalt_`gr'_min `gr'_maj `gr'_min
 }
 
 keep year llkk dist diff*
 save "$dat\timesame_neighbours.dta", replace


 
*****  relatives ****

	use "$dat\mun politician data.dta", clear
	keep if valar>2000 & valar<2011 & vald==1
g year=valar
	save "$dat\relatives_neighbours.dta", replace
	clear
	odbc load, exec("select * from BioForaldrar") dsn("P0624")
	ren LopNr p_id
	joinby p_id using "$dat\relatives_neighbours.dta", unmatched (using)
	
	ren p_id pol_id
	ren LopNr_BioMor  p_id
	ren llkk llkk_pol
	drop _merge
		joinby  p_id year using "$dat\dist p_id year.dta", unmatched(master)
		drop  _merge vdnamn
		ren llkk llkk_mo
		ren dist dist_mo
		drop p_id
		ren 	LopNr_BioFar  p_id

		joinby  p_id year using "$dat\dist p_id year.dta", unmatched(master)
		drop _merge vdnamn
		ren llkk llkk_fa
		ren dist dist_fa
					drop p_id
					
					ren pol_id p_id
						save "$dat\relatives_neighbours.dta", replace
	clear
	odbc load, exec("select * from BioSyskon") dsn("P0624")
	ren LopNr p_id
	joinby p_id using "$dat\relatives_neighbours.dta", unmatched (using)
	
	ren p_id pol_id
	
	ren LopNr_BioSyskon  p_id

	drop _merge
		joinby  p_id year using "$dat\dist p_id year.dta", unmatched(master)
		drop _merge vdnamn
		ren llkk llkk_sib
		ren dist dist_sib
		drop p_id
		ren pol_id p_id 
				joinby  p_id year using "$dat\dist p_id year.dta", unmatched(master)
				
				
				gen same_mo=dist==dist_mo & llkk==llkk_mo
				gen same_fa=dist==dist_fa & llkk==llkk_fa
				gen same_sib=dist==dist_sib & llkk==llkk_sib
				
			bysort p_id year : egen same_sib_sum=sum(same_sib)
			gen same_rel= same_fa+ same_mo+ same_sib_sum
			duplicates drop p_id year, force
			
			
	gen parti_low=lower(parti_initial)
	
*generarar variabel för antalter nominerade och valda från varje parti
foreach parti in b m c k f l g s v{
gen vald_`parti'= vald==1 & parti_low =="`parti'" 
gen vald_rel_`parti'= vald==1 & parti_low =="`parti'" & same_rel>=1 & same_rel!=.
gen vald_norell_`parti'= vald==1 & parti_low =="`parti'" & (same_rel<1 |  same_rel==.)

 }
 
 do "$do\mun code clean.do" 
 collapse (sum) vald vald_*  , by (llkk dist year)
 
gen valar=year
joinby llkk valar using "$dat\kommundata.dta", unmatched(master)
	foreach gr in vald{
 gen `gr'_maj =mmaj2*`gr'_m +  cmaj2*`gr'_c +  fpmaj2*`gr'_f + kdmaj2*`gr'_k + mpmaj2*`gr'_g + smaj2*`gr'_s + vmaj2*`gr'_v
 gen `gr'_min= (`gr'_m + `gr'_c + `gr'_f + `gr'_k + `gr'_g + `gr'_s + `gr'_v)-`gr'_maj
 
 foreach pow in maj min{
 		bysort llkk year:  egen kom_`gr'_`pow'=sum(`gr'_`pow')
		drop `gr'_`pow'
 }
 
	}
	foreach gr in vald_rel vald_norell{
 gen `gr'_maj =mmaj2*`gr'_m +  cmaj2*`gr'_c +  fpmaj2*`gr'_f + kdmaj2*`gr'_k + mpmaj2*`gr'_g + smaj2*`gr'_s + vmaj2*`gr'_v
 gen `gr'_min= (`gr'_m + `gr'_c + `gr'_f + `gr'_k + `gr'_g + `gr'_s + `gr'_v)-`gr'_maj
 
 foreach pow in maj min{
 		bysort llkk year:  egen kom=sum(`gr'_`pow')
		
* calculate share of group living in the district
	gen sh_`gr'_`pow' =`gr'_`pow' /kom
	gen shalt_`gr'_`pow' =`gr'_`pow' /kom_vald_`pow'
	drop kom
 } 
 gen diff_maj_min_`gr'=sh_`gr'_maj-sh_`gr'_min
  gen diffalt_maj_min_`gr'=shalt_`gr'_maj-shalt_`gr'_min
 drop sh_`gr'_maj  sh_`gr'_min shalt_`gr'_maj  shalt_`gr'_min `gr'_maj `gr'_min
 }
 
keep year llkk dist diff*
save "$dat\relatives_neighbours.dta", replace

	

***************************
***** Election results  ***
***************************	
** data on election results from Valmyndigheten 2002, 2006, 2010***

use "$dat\parl dist 2010.dta" , clear
drop llkk
ren om lan
tostring lan kom , replace force
forvalues n=1/9{
replace kom="0`n'" if  kom=="`n'"
}
egen llkk=concat (lan kom)
destring llkk, force replace

joinby llkk dist using "$dat\mun dist 2010.dta", unmatched(both) _merge(_merge)


foreach p in m c f k s v g b{

replace  rost`p'=0 if rost`p'==.
replace  rostr`p'=0 if rostr`p'==.
 gen vsr_`p'=rostr`p'/rostrgil
 gen vsm_`p'=rost`p'/rostgil
 ren  rost`p' rostm`p'
 }
gen part_r=rostrgil /rostrber
gen part_kr=rostgil /rostber
 
 ren rostrgil rostr_gil 
 ren rostgil rostm_gil
 ren rostber rostm_ber
 ren valdistriktnam vdnamn
 keep llkk dist vsr* vsm* part* vdnamn rostm* rostr*
 gen year=2010
 gen valar=2010


do "$do\mun code clean.do" 
 save "$temp\dist_election_2010.dta", replace

 use "$dat\roster parti distrikt alla val 2006.dta" , clear
ren VALTYP val
ren VALDIST dist

tostring LAN KOM , replace force
forvalues n=1/9{
replace KOM="0`n'" if  KOM=="`n'"
}

egen llkk=concat( LAN KOM)
destring llkk, force replace
ren TAL_ROST rost

replace val=strlower(val)
gen parti_initial="m" if FORKORTN=="M"
replace parti_initial="b" if FORKORTN=="SD"
replace parti_initial="f" if FORKORTN=="FP"
replace parti_initial="c" if FORKORTN=="C"
replace parti_initial="g" if FORKORTN=="MP"
replace parti_initial="s" if FORKORTN=="S"
replace parti_initial="v" if FORKORTN=="V"
replace parti_initial="k" if FORKORTN=="KD"
replace parti_initial="o" if FORKORTN=="BLANK" | FORKORTN=="Og"
replace parti_initial="l" if parti_initial==""
drop if parti_initial =="o"

collapse (sum) rost , by(val llkk  dist parti_initial)

reshape wide rost@,j(val) i( llkk  dist parti_initial) string
ren rostk rostm

foreach var in rostm rostl rostr{
bysort llkk dist: egen `var'_gil=sum(`var')
}
reshape wide rostr@ rostm@ rostl@,j( parti_initial) i( llkk  dist rostm_gil rostl_gil rostr_gil) string


foreach p in m c f k s v g b l{


replace  rostm`p'=0 if rostm`p'==.
replace  rostr`p'=0 if rostr`p'==.


 gen vsr_`p'=rostr`p'/rostr_gil
 gen vsm_`p'=rostm`p'/rostm_gil
 replace vsr_`p'=0  if  vsr_`p'==. & rostr_gil!=.
 replace vsm_`p'=0  if  vsm_`p'==. & rostm_gil!=.
 }

 keep llkk dist vsr* vsm*  rostr* rostm*
 gen year=2006
 gen valar=2006
 do "$do\mun code clean.do" 
 save "$temp\dist_election_2006", replace
 
 
 use "$dat\roster parti distrikt alla val 2002.dta" , clear
ren VALTYP val
ren VALDIST dist
tostring LAN KOM , replace force
forvalues n=1/9{
replace KOM="0`n'" if  KOM=="`n'"
}
egen llkk=concat( LAN KOM)
destring llkk, force replace
ren TAL_ROST rost

replace val=strlower(val)
gen parti_initial="m" if FORKORTN=="M"
replace parti_initial="b" if FORKORTN=="SD"
replace parti_initial="f" if FORKORTN=="FP"
replace parti_initial="c" if FORKORTN=="C"
replace parti_initial="g" if FORKORTN=="MP"
replace parti_initial="s" if FORKORTN=="S"
replace parti_initial="v" if FORKORTN=="V"
replace parti_initial="k" if FORKORTN=="KD"
replace parti_initial="o" if FORKORTN=="BLANK" | FORKORTN=="Og"
replace parti_initial="l" if parti_initial==""
drop if parti_initial =="o"

collapse (sum) rost , by(val llkk  dist parti_initial)

reshape wide rost@,j(val) i( llkk  dist parti_initial) string
ren rostk rostm

foreach var in rostm rostl rostr{
bysort llkk dist: egen `var'_gil=sum(`var')

}
reshape wide rostr@ rostm@ rostl@,j( parti_initial) i( llkk  dist rostm_gil rostl_gil rostr_gil) string

foreach p in m c f k s v g b l{

replace  rostm`p'=0 if rostm`p'==.
replace  rostr`p'=0 if rostr`p'==.
 gen vsr_`p'=rostr`p'/rostr_gil
 gen vsm_`p'=rostm`p'/rostm_gil
 replace vsr_`p'=0  if  vsr_`p'==. & rostr_gil!=.
 replace vsm_`p'=0  if  vsm_`p'==. & rostm_gil!=.
 }

 keep llkk dist vsr* vsm* rostm* rostr* rostr_gil rostm_gil
 gen year=2002
 gen valar=2002
 do "$do\mun code clean.do" 
 save "$temp\dist_election_2002.dta", replace
 append using "$temp\dist_election_2006.dta"
 append using "$temp\dist_election_2010.dta"
save "$dat\dist_election.dta", replace
	
	
*************************
***** House prices    ***
*************************


use "$dat\all_price_transform", clear // year 2005-2014
drop id
duplicates drop // 0 obs
g temp=_n
****************************************************
** Drop if does not look like residential housing **
****************************************************

drop if ad_year>year & !mi(ad_year) // 400 obs 

ren price_per_sq_meter price_sqm
destring price_sqm, replace 
g price_sqm2=contract_price/living_area if !mi(living_area) & !mi(contract_price) & living_area!=0 & contract_price!=0
replace price_sqm2=round(price_sqm2)
replace price_sqm=. if price_sqm<500 & !mi(price_sqm) 
replace price_sqm=. if price_sqm>200000 & !mi(price_sqm) 
replace price_sqm=price_sqm2 if mi(price_sqm) 
drop price_sqm2

drop if housing_category=="T" //plot 11 000 obs, only include house+summer house+apartments
drop if mi(price_sqm) // 36,000 obs 
drop if price_sqm<500 
drop if price_sqm>200000 // 70 obs

******************************************************************************
*drop observations with more than 1 km between mstat- and google-coordinates
*******************************************************************************
* Double checked coordinates and adresses with google only exists for observations with coordinates, adress and county information in maklar data
ren location_wgs84_geopt_lon lon_wgs84
ren location_wgs84_geopt_lat lat_wgs84
ren google_geometry_location_lon google_lon_wgs84
ren google_geometry_location_lat google_lat_wgs84
ren location_rt90_geopt_lat lat_rt90
ren location_rt90_geopt_lon lon_rt90

tab year if google_min_dist_km>=1 & !mi(google_min_dist_km) // more than 1 km off 34 000 oba, overrepresented for summer house 
drop if google_min_dist_km>=1 & !mi(google_min_dist_km)

*check that coordinates are within Sweden
sum lon_rt90
sum google_lon_rt90
sum lat_rt90
sum google_lat_rt90

replace lat_rt90= google_lat_rt90 if google_min_dist_km<1 & google_min_dist_km>0.1 // replace with google coordinates 67000 obs
replace lon_rt90= google_lon_rt90 if google_min_dist_km<1 & google_min_dist_km>0.1 

***********************
*** Check duplicates **
***********************
* duplicats with the same contract date, address, and price 
* could be new production, or multiple apartments in same building, but could also be error due to information reported from different sources, usually just 1 dup, looks like it has additional info from the ad 

duplicates tag  formatted_address municipality plot_area living_area contract_price price_sqm contract_date housing_category apartment_number apartment_floor, g(dup)
tab dup // 4 %, only 3 & of them are new production
drop dup
sort  formatted_address municipality plot_area living_area contract_price price_sqm   contract_date housing_category apartment_number apartment_floor  lat_rt90 lon_rt90 temp
duplicates drop   formatted_address municipality plot_area living_area contract_price price_sqm contract_date housing_category apartment_number apartment_floor, force

********************************
*** Create some new variables **
********************************

drop apart* 
g apartment=(housing_category=="B") if  !mi(housing_category)
gen sold_objects=1


foreach x in price_sqm   sold_objects {
	gen  `x'_new=  `x' if  new_production ==1
	gen  `x'_old=  `x' if  new_production ==0
	gen  `x'_a=  `x' if housing_category=="B" & new_production ==0
	gen  `x'_h=  `x' if housing_category=="V" & new_production ==0
}




***********************************************
** Create 500 m sq (coordinates=left corner) **
***********************************************

g xcoord= lat_rt90
g ycoord= lon_rt90
replace ycoord=int(ycoord/500)*500
replace xcoord=int(xcoord/500)*500


gen electionperiod=1998 if year>1999 & year<2003
replace  electionperiod=2002 if year>2002 & year<2007
replace  electionperiod=2006 if year>2006 & year<2011
replace  electionperiod=2010 if year>2010 & year<2015
replace  electionperiod=2014 if year>2014 & year<2019

*******************************************
** Join with precinct data. This is done **
** for the 2002, 2006 and 2010 precincts **
*******************************************



foreach year in 2002 2006 2010 {
preserve
joinby xcoord ycoord using "$dat\xy_valdist_`year'.dta", unmatched(none)
ren Valdist`year' Valdist
gen llkk=substr(Valdist, 1, 4) 
gen dist=substr(Valdist, 5, 4) 
destring llkk dist, replace
do "$do\mun code clean.do"
sort electionperiod llkk dist temp
collapse (mean) price_sqm* living_area*    (sum) sold_objects* ,  by(electionperiod llkk dist)

sort llkk dist election
foreach var of varlist price_sqm-sold_objects_h {

gen `var'_lag = `var'[_n-1] if dist==dist[_n-1] & llkk==llkk[_n-1]  & election==(election[_n-1] +4)
gen `var'_ch = `var'-`var'_lag 

gen `var'_lead = `var'[_n+1] if dist==dist[_n+1]  & llkk==llkk[_n-1] & election==(election[_n+1]-4)

}
keep if election==`year' 
gen year=election
if `year'==2002 save "$dat\prices_precinct_ep.dta", replace
else append using  "$dat\prices_precinct_ep.dta"
save "$dat\prices_precinct_ep.dta", replace
restore
} 



*************************
*****    Home owners  ***
*************************
use "$dat\tenure500_midpoint.dta", clear

ren ostruta500  ycoord
ren nordruta500 xcoord
joinby xcoord ycoor  using "$dat\xy_valdist_2002.dta" , unmatched(master) 
drop _merge
joinby xcoord ycoor  using "$dat\xy_valdist_2006.dta" , unmatched(master) 
drop _merge
joinby xcoord ycoor  using "$dat\xy_valdist_2010.dta" , unmatched(master) 
drop _merge
gen  electionperiod=2002 if year==2002
replace  electionperiod=2006 if year==2006 
replace  electionperiod=2010 if year ==2010
replace  electionperiod=2014 if year==2014
keep if electionperiod!=.


foreach year in 2002 2006 2010{
preserve 
drop if mi(Valdist`year')
ren Valdist`year' Valdist
gen llkk=substr(Valdist, 1, 4) 
gen dist=substr(Valdist, 5, 4) 
destring llkk dist, replace
do "$do\mun code clean.do" 
drop if mi(dist)
keep if electionperiod==`year'
collapse (sum) pop owning_house- agriculture ,  by(electionperiod llkk dist)
gen year=election
if `year'==2002 save "$dat\tenure_precinct_ep.dta", replace
else append using "$dat\tenure_precinct_ep.dta"
save "$dat\tenure_precinct_ep.dta", replace
restore
} 


********************************
*****  Building permits      ***
********************************

odbc load, exec("SELECT * FROM Bygglov") dsn("P0624") clear
gen year=substr(ArManBygglovBeslut, 1, 4)
gen month=substr(ArManBygglovBeslut, 5, 2)
destring year month, replace

gen YearMonth_sif=ym(year, month)
format YearMonth %tm

g county=regexs(0) if regexm(Kommun, "^[0-9][0-9]")
destring county, replace
destring Kommun, replace


*** Generating data sets with aggregated number of building perm ***
* Use the years 2002--2014, drop some missing information on building type and valdistrikt and drop some variables


* Aggregated over election period
gen election_period=1994 if  ArManBygglovBeslut<"199810"
replace election_period=1998 if ArManBygglovBeslut>"199810" & ArManBygglovBeslut<"200210"

 replace election_period= 2002 if ArManBygglovBeslut>"200209" & ArManBygglovBeslut<"200610"
replace election_period=2006 if ArManBygglovBeslut>"200609" & ArManBygglovBeslut<"201010"
 replace election_period=2010 if ArManBygglovBeslut>"201009" & ArManBygglovBeslut<"201410"
 replace election_period=2014 if ArManBygglovBeslut>"201409" & ArManBygglovBeslut<"201810"


gen perm_tot=  BruttoAreaTot

ren AntLgh no_apt


** Number of perm, divided by type of permit
foreach typ in 11 12 13 19 21  23 31 32 33 34 35 39 41 44 51 52  {
 
gen perm_`typ'=1 if  HuvudbyggnadsTyp==`typ'  

}
ren perm_11 perm_onefam
ren perm_12 perm_twofam
ren perm_13 perm_chain
ren perm_19 perm_vaca
ren perm_21 perm_multi


***
foreach typ in  onefam twofam chain multi  {
gen area_`typ'= BruttoAreaTot  if perm_`typ'==1
}

gen area_single = .
gen no_single = .
foreach typ in  onefam twofam chain   {
replace area_single= BruttoAreaTot  if perm_`typ'==1
replace no_single= no_apt  if perm_`typ'==1
}

*** save statistics to use with appeals data in section A6 ****
preserve
keep if year==2017
gen county_num=substr(Valdist2014, 1, 2) 
destring county_num, replace
bys county_num: egen multifam=sum(perm_multi)
replace perm_onefam=perm_twofam if mi(perm_onefam)
replace perm_onefam=perm_chain if mi(perm_onefam)
bys county_num: egen onefam=sum(perm_onefam)
keep county_num multifam onefam
drop if mi(county_num)
duplicates drop
save "$dat\permits_county_2017.dta", replace 
restore
*********************


foreach val in 2002 2006 2010 {
preserve
keep area_multi area_single election_period Valdist`val'
ren Valdist`val' Valdist
collapse (sum) area_multi area_single , by (election_period Valdist )
fillin election Valdist
sort Valdist election
foreach var of varlist area_multi area_single {
replace  `var'=0 if  `var'==.
gen `var'_lag = `var'[_n-1] if Valdist==Valdist[_n-1]   & election==(election[_n-1] +4)
replace  `var'_lag=0 if  `var'_lag==.
gen `var'_lead = `var'[_n+1] if Valdist==Valdist[_n+1]  & election==(election[_n+1]-4)
replace  `var'_lead=0 if  `var'_lead==.
}
gen llkk=substr(Valdist, 1, 4) 
gen dist=substr(Valdist, 5, 4) 
destring llkk dist, replace
do "$do\mun code clean.do" // ok to do after collapse since no variation in municipality code within year
keep if election==`val'
gen year=election
if `val'==2002 save "$dat\permits.dta", replace
else append using  "$dat\permits.dta"
save "$dat\permits.dta", replace
restore
} 

use  "$dat\permits.dta", clear

merge m:1 year llkk dist using "$dat\valdistrikt_alla.dta"

drop if dist==0
drop if dist==9999
foreach var of varlist  area_multi area_single   area_multi_lag- area_single_lead {
replace `var'=0 if _merge==2
}

replace election_period=year if _merge==2
drop _merge _fillin
sum area_multi area_single // 17 630 obs
save "$dat\permits.dta", replace
 
 


***********************
****    Equipop    ****
***********************
* Tar ut data för att skicka in i EquiPop
 
 foreach year in 2002 2006 2010 {
* Geo:
use "$dat\coordinates.dta", clear
keep if year==`year'
ren id p_id
keep p_id sams xcoord ycoord llkk year
do "$do\mun code clean.do"
ren llkk kommunkod
ren xcoord XKOORD_`year'
ren ycoord YKOORD_`year'
destring sams, gen(sams_`year') force 
gen valar=`year'
sum 

* Ålder och födelseregion
joinby p_id year using "$dat\lisa_togeo.dta", unmatched(none) 
keep if age>17

* Politik:
joinby p_id valar using "$dat\mun politician data.dta", unmatched(master) 
drop _merge

* Generate some variables
gen politician_`year'=0
replace politician_`year'=1 if  vald==1 
drop if sams_`year'==.
bysort sams_`year': egen sams_pop=count(p_id)
sum sams_pop, det

gen highed=educ_year>13

gen ethnic_maj= 1-inv1g_eje

fasterxtile pct=inc, n(100)
gen highq= pct>75 & pct!=.

save "$dat\MergedData\Merged_`year'.dta", replace
}



* High-educated for EquiPop
foreach year in 2002 2006 2010 {
use "$dat\MergedData\Merged_`year'.dta", clear
sort XKOORD_`year' YKOORD_`year'
by XKOORD_`year' YKOORD_`year': egen pop=count(p_id)
replace pop=0 if pop==.
by XKOORD_`year' YKOORD_`year': egen pop_highed=count(p_id) if highed==1
replace pop_highed=0 if pop_highed==.
 
collapse (max) p_id pop pop_highed, by(XKOORD_`year' YKOORD_`year')
sum
export delimit "$dat\To_EquiPop\highed_`year'.dat", delimit(tab) replace
}



* High-earners (upper quartile) for EquiPop
foreach year in 2002 2006 2010 {
use "$dat\MergedData\Merged_`year'.dta", clear
sort XKOORD_`year' YKOORD_`year'
by XKOORD_`year' YKOORD_`year': egen pop=count(p_id)
replace pop=0 if pop==.
by XKOORD_`year' YKOORD_`year': egen pop_highq=count(p_id) if highq==1
replace pop_highq=0 if pop_highq==.
 
collapse (max) p_id pop pop_highq, by(XKOORD_`year' YKOORD_`year')
sum
export delimit "$dat\To_EquiPop\highq_`year'.dat", delimit(tab) replace
}


* Ethnic majority for EquiPop
foreach year in 2002 2006 2010 {
use "$dat\MergedData\Merged_`year'.dta", clear
sort XKOORD_`year' YKOORD_`year'
by XKOORD_`year' YKOORD_`year': egen pop=count(p_id)
replace pop=0 if pop==.
by XKOORD_`year' YKOORD_`year': egen pop_ethnic_maj=count(p_id) if ethnic_maj==1
replace pop_ethnic_maj=0 if pop_ethnic_maj==.
 
collapse (max) p_id pop pop_ethnic_maj, by(XKOORD_`year' YKOORD_`year')
sum
export delimit "$dat\To_EquiPop\ethnic_maj_`year'.dat", delimit(tab) replace
}

log close